##########################################################
# Project:    Talking to the Populist Radical Right
# Task:       The script counts the mentions of parties in 
#             speeches in the Dutch parliament and
#             annotates the data set
# Author:     Jan Schwalbach (21/07/2022)
##########################################################

# Loading packages and data

library(quanteda)
library(ggrepel)
library(grid)
library(zoo)
library(pscl)
library(aod)
library(ggplot2)
library(stringr)

load(file="Corpus_Netherlands.Rdata")

# Deleting parties that are not analysed and the speaker

corpusNL <- corpusNL[corpusNL$type != "0",]
corpusNL <- corpusNL[corpusNL$party != "other",]
corpusNL <- corpusNL[corpusNL$party != "NA",]
corpusNL <- corpusNL[corpusNL$voorzitter != T,]
corpusNL <- corpusNL[corpusNL$party != "GPV",]
corpusNL <- corpusNL[corpusNL$party != "PvdD",]
corpusNL <- corpusNL[corpusNL$party != "RPF",]
corpusNL <- corpusNL[corpusNL$party != "SGP",]
corpusNL <- corpusNL[corpusNL$party != "LPF",]
corpusNL <- corpusNL[!is.na(corpusNL$party),]
corpusNL <- corpusNL[corpusNL$date >= "2006-11-22",]

# Annotating speeches for topics

corpusNL$immigration <- 0
corpusNL$immigration[!corpusNL$migpres1!="TRUE"] <- 1
corpusNL$immigration[!corpusNL$migrationcount<=2] <- 1
corpusNL$immigration[corpusNL$migrationcount==0] <- 0

corpusNL$education <- 0
corpusNL$education[!corpusNL$educationpres1!="TRUE"] <- 1
corpusNL$education[!corpusNL$educationcount<=2] <- 1
corpusNL$education[corpusNL$educationcount==0] <- 0

# Loading the dictionaries and counting for each party

corpusNL$text <- corpusNL$raw_text
dictionary <- read.csv(file = "CDA.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$CDA <- str_trim(dictionary$CDA, side = "both")
CDA <- paste(" ", paste(dictionary$CDA[dictionary$CDA != ""], collapse = " | "), " ", sep = "")
corpusNL$CDAcount <- str_count(corpusNL$text, CDA) 
corpusNL$CDApres <- ifelse(corpusNL$CDAcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "CU.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$CU <- str_trim(dictionary$CU, side = "both")
CU <- paste(" ", paste(dictionary$CU[dictionary$CU != ""], collapse = " | "), " ", sep = "")
corpusNL$CUcount <- str_count(corpusNL$text, CU)
corpusNL$CUpres <- ifelse(corpusNL$CUcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "D66.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$D66 <- str_trim(dictionary$D66, side = "both")
D66 <- paste(" ", paste(dictionary$D66[dictionary$D66 != ""], collapse = " | "), " ", sep = "")
corpusNL$D66count <- str_count(corpusNL$text, D66) 
corpusNL$D66pres <- ifelse(corpusNL$D66count > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "GL.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$GL <- str_trim(dictionary$GL, side = "both")
GL <- paste(" ", paste(dictionary$GL[dictionary$GL != ""], collapse = " | "), " ", sep = "")
corpusNL$GLcount <- str_count(corpusNL$text, GL) 
corpusNL$GLpres <- ifelse(corpusNL$GLcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "PvdA.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$PvdA <- str_trim(dictionary$PvdA, side = "both")
PvdA <- paste(" ", paste(dictionary$PvdA[dictionary$PvdA != ""], collapse = " | "), " ", sep = "")
corpusNL$PvdAcount <- str_count(corpusNL$text, PvdA) 
corpusNL$PvdApres <- ifelse(corpusNL$PvdAcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "PVV.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$PVV <- str_trim(dictionary$PVV, side = "both")
PVV <- paste(" ", paste(dictionary$PVV[dictionary$PVV != ""], collapse = " | "), " ", sep = "")
corpusNL$PVVcount <- str_count(corpusNL$text, PVV) 
corpusNL$PVVpres <- ifelse(corpusNL$PVVcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "SP.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$SP <- str_trim(dictionary$SP, side = "both")
SP <- paste(" ", paste(dictionary$SP[dictionary$SP != ""], collapse = " | "), " ", sep = "")
corpusNL$SPcount <- str_count(corpusNL$text, SP) 
corpusNL$SPpres <- ifelse(corpusNL$SPcount > 0, TRUE, FALSE)

dictionary <- read.csv(file = "VVD.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$VVD <- str_trim(dictionary$VVD, side = "both")
VVD <- paste(" ", paste(dictionary$VVD[dictionary$VVD != ""], collapse = " | "), " ", sep = "")
corpusNL$VVDcount <- str_count(corpusNL$text, VVD)
corpusNL$VVDpres <- ifelse(corpusNL$VVDcount > 0, TRUE, FALSE)


CDAcount <- aggregate(corpusNL$CDAcount, by=list(Category=corpusNL$party), FUN=sum)
names(CDAcount)[2] <- "CDA"
CUcount <- aggregate(corpusNL$CUcount, by=list(Category=corpusNL$party), FUN=sum)
names(CUcount)[2] <- "CU"
CUcount[1] <- NULL
D66count <- aggregate(corpusNL$D66count, by=list(Category=corpusNL$party), FUN=sum)
names(D66count)[2] <- "D66"
D66count[1] <- NULL
GLcount <- aggregate(corpusNL$GLcount, by=list(Category=corpusNL$party), FUN=sum)
names(GLcount)[2] <- "GL"
GLcount[1] <- NULL
PvdAcount <- aggregate(corpusNL$PvdAcount, by=list(Category=corpusNL$party), FUN=sum)
names(PvdAcount)[2] <- "PvdA"
PvdAcount[1] <- NULL
PVVcount <- aggregate(corpusNL$PVVcount, by=list(Category=corpusNL$party), FUN=sum)
names(PVVcount)[2] <- "PVV"
PVVcount[1] <- NULL
SPcount <- aggregate(corpusNL$SPcount, by=list(Category=corpusNL$party), FUN=sum)
names(SPcount)[2] <- "SP"
SPcount[1] <- NULL
VVDcount <- aggregate(corpusNL$VVDcount, by=list(Category=corpusNL$party), FUN=sum)
names(VVDcount)[2] <- "VVD"
VVDcount[1] <- NULL

# Combining all party counts

partycounts <- cbind(CDAcount,CUcount,D66count,GLcount,PvdAcount,PVVcount,SPcount,VVDcount)
partycounts$party <- partycounts$Category

partycounts$CDA[partycounts$party == "CDA"] <- 0
partycounts$CU[partycounts$party == "CU"] <- 0
partycounts$D66[partycounts$party == "D66"] <- 0
partycounts$GL[partycounts$party == "GL"] <- 0
partycounts$PvdA[partycounts$party == "PvdA"] <- 0
partycounts$PVV[partycounts$party == "PVV"] <- 0
partycounts$SP[partycounts$party == "SP"] <- 0
partycounts$VVD[partycounts$party == "VVD"] <- 0

partycounts$Category <- NULL

partycounts$PVV_all <- (partycounts$PVV)/(partycounts$CDA+partycounts$CU+partycounts$D66+partycounts$GL+partycounts$PvdA+partycounts$PVV+partycounts$SP+partycounts$VVD)


# Counting the shares for all/left/right parties

countsall <- partycounts[partycounts$party != "PVV",]

countsleft <- countsall[countsall$party != "CDA",]
countsleft <- countsleft[countsleft$party != "CU",]
countsleft <- countsleft[countsleft$party != "VVD",]

countsright <- countsall[countsall$party != "PvdA",]
countsright <- countsright[countsright$party != "GL",]
countsright <- countsright[countsright$party != "D66",]
countsright <- countsright[countsright$party != "SP",]

mean(countsleft$PVV_all)
mean(countsright$PVV_all)
mean(countsall$PVV_all)

# Preparing and annotating the data set for the logit regression

corpusNL <- corpusNL[corpusNL$party != "PVV",]

# Left parties

corpusNL$left <- 0
corpusNL$left[corpusNL$party == "PvdA"] <- 1
corpusNL$left[corpusNL$party == "GL"] <- 1
corpusNL$left[corpusNL$party == "D66"] <- 1
corpusNL$left[corpusNL$party == "SP"] <- 1

# Mainstream parties

corpusNL$mainstream <- 0
corpusNL$mainstream[corpusNL$party == "PvdA"] <- 1
corpusNL$mainstream[corpusNL$party == "CDA"] <- 1
corpusNL$mainstream[corpusNL$party == "VVD"] <- 1
corpusNL$RWdummy <- corpusNL$PVVcount
corpusNL$RWdummy[corpusNL$RWdummy >= 1] <- 1
table(corpusNL$RWdummy)

# Term

corpusNL$term <- 1
corpusNL$country <- "Netherlands"
corpusNL$term[corpusNL$date >= "2010-06-09" & corpusNL$date < "2012-09-12"] <- 2
corpusNL$term[corpusNL$date >= "2012-09-12" & corpusNL$date < "2017-03-15"] <- 3

# Government Type/party and right-wing populist size

corpusNL$minority <- 0
corpusNL$minority[corpusNL$term == 2] <- 1
  
corpusNL$RRP_size <- 0
corpusNL$RRP_size[corpusNL$term == 1] <- 6
corpusNL$RRP_size[corpusNL$term == 2] <- 16
corpusNL$RRP_size[corpusNL$term == 3] <- 10

corpusNL$support <- 0
corpusNL$support[corpusNL$term == 2] <- 1

corpusNL$Government <- "Opposition"

corpusNL$Government[corpusNL$party == "CDA" & corpusNL$term == 1] <- "Government"
corpusNL$Government[corpusNL$party == "CU" & corpusNL$term == 1] <- "Government"
corpusNL$Government[corpusNL$party == "PvdA" & corpusNL$term == 1] <- "Government"
corpusNL$Government[corpusNL$party == "CDA" & corpusNL$term == 2] <- "Government"
corpusNL$Government[corpusNL$party == "VVD" & corpusNL$term == 2] <- "Government"
corpusNL$Government[corpusNL$party == "PvdA" & corpusNL$term == 3] <- "Government"
corpusNL$Government[corpusNL$party == "VVD" & corpusNL$term == 3] <- "Government"
corpusNL <- corpusNL[corpusNL$date >= "2006-11-22",]

Netherlands <- subset(corpusNL, select=c("RWdummy", "immigration", "education", "left", "mainstream", "type", "term", "minority", "RRP_size", "support", "Government", "country"))
save(Netherlands, file = "Netherlands_party_mentions.Rdata") # save corpus